def removeFinishedJobs(cls, group): """ remove all finished jobs from a specific group. jobs assigned: all jobs in the group """ try: session = BossLitePoolDB( "MySQL", pool=cls.params['sessionPool'] ) db = TrackingDB( session ) joblist = db.getAssociatedJobs() # in case of empty results if joblist is None: logging.debug( "No finished jobs to be removed from query queues" ) return for pair in joblist: db.removeFromCheck( group, pair[0], pair[1], ) #logging.debug( # "Removing jobs for group " + str(group) \ # + " with BOSS id " + str( pair[0] ) + '.' \ # + str( pair[1] )\ # ) logging.debug("Removed jobs from group %s" % str(group) ) session.close() del( joblist ) except BossLiteError, ex: logging.error( 'Failed to remove jobs from queues: %s ' % ex )
def addNewJobs(cls): """ include new jobs in the set of jobs to be watched for. jobs assigned: all new jobs. """ try: session = BossLitePoolDB( "MySQL", pool=cls.params['sessionPool'] ) db = TrackingDB( session ) joblist = db.getUnassociatedJobs() # in case of empty results if joblist is None: logging.debug( "No new jobs to be added in query queues") return for pair in joblist: db.addForCheck( pair[0], pair[1] ) #logging.debug(\ # "Adding jobs to queue with BOSS id "\ # + str( pair[0] ) + '.' + str( pair[1] )\ # ) session.close() del( joblist ) except BossLiteError, ex: logging.error( 'Failed to remove jobs from queues: %s ' % ex )
def __init__(self, schedType, **args): """ __init__ """ self.schedType = schedType self.bossTask = None self.scheduler = None self.counters = ['pending', 'submitted', 'waiting', 'ready', \ 'scheduled', 'running', 'cleared', 'created', 'other'] print dbConfig self.bossLiteSession = BossLiteAPI('MySQL', dbConfig, makePool=True) self.sessionPool = self.bossLiteSession.bossLiteDB.getPool() self.db = TrackingDB( self.bossLiteSession.bossLiteDB )
def doWork(cls, group): """ get the status of the jobs in the group. jobs assigned: all jobs in the group. """ logging.info("%s Getting job status for jobs in group %s" \ %( cls.fullId(), str(group) ) ) try: # get DB sessions bossSession = BossLiteAPI("MySQL", pool=cls.params['sessionPool']) db = TrackingDB( bossSession.bossLiteDB ) tasks = db.getGroupTasks(group) for taskId in tasks : cls.bossQuery( bossSession, int(taskId) ) except BossLiteError, ex: logging.error( "%s JobTrackingThread exception: %s" \ %( cls.fullId(), ex ) )
class PilotBossMonitor(MonitorInterface): """ _PilotBossMonitor_ """ def __init__(self, schedType, **args): """ __init__ """ self.schedType = schedType self.bossTask = None self.scheduler = None self.counters = ['pending', 'submitted', 'waiting', 'ready', \ 'scheduled', 'running', 'cleared', 'created', 'other'] print dbConfig self.bossLiteSession = BossLiteAPI('MySQL', dbConfig, makePool=True) self.sessionPool = self.bossLiteSession.bossLiteDB.getPool() self.db = TrackingDB( self.bossLiteSession.bossLiteDB ) def monitorPilot(self, site, tqStateApi): """ __monitorPilot__ This fnction will submit pilot jobs using the selected mechanism """ if ( self.schedType == 'LSF' ): #schedConfig = {'user_proxy' : '', 'service' : '', 'config' : '' } schedConfig = {'cpCmd': 'rfcp', 'rfioSer': '%s:' % self.host} #self.scheduler = Scheduler.Scheduler('SchedulerLsf', schedConfig) poller = PABossLitePoll() poller() total = poller.pilotJobs #start the logic here def getGroups(self, group): tasks = self.db.getGroupTasks(group) for taskId in tasks : print taskId def getStatistic(self): """ __getStatistics__ Poll the BOSS DB for a summary of the job status """ # summary of the jobs in the DB result = self.db.getJobsStatistic() if result is not None: counter = {} for ctr in self.counters: counter[ctr] = 0 for pair in result : status, count = pair if status == 'E': continue elif status == 'R' : counter['running'] = count elif status == 'I': counter['pending'] = count elif status == 'SW' : counter['waiting'] = count elif status == 'SR': counter['ready'] = count elif status == 'SS': counter['scheduled'] = count elif status == 'SU': counter['submitted'] = count elif status == 'SE': counter['cleared'] = count elif status == 'C': counter['created'] = count else: counter['other'] += count # display counters for ctr, value in counter.iteritems(): print(ctr + " jobs : " + str(value)) print("....................") return result def pollJobs(self, runningAttrs, processStatus, skipStatus=None ): """ __pollJobs__ basic structure for jobs polling """ offset = 0 loop = True while loop : logging.debug("Max jobs to be loaded %s:%s " % \ (str( offset ), str( offset + self.jobLimit) ) ) self.newJobs = self.bossLiteSession.loadJobsByRunningAttr( runningAttrs=runningAttrs, \ limit=self.jobLimit, offset=offset ) logging.info("Polled jobs : " + str( len(self.newJobs) ) ) # exit if no more jobs to query if self.newJobs == [] : loop = False break else : offset += self.jobLimit try: self.db.processBulkUpdate( self.newJobs, processStatus, \ skipStatus ) logging.info( "Changed status to %s for %s loaded jobs" \ % ( processStatus, str( len(self.newJobs) ) ) ) except BossLiteError, err: logging.error( "Failed handling %s loaded jobs, waiting next round: %s" \ % ( processStatus, str( err ) ) ) continue
def applyPolicy(self): """ __applyPolicy__ apply policy. """ # get DB session session = BossLitePoolDB( "MySQL", pool=self.sessionPool ) db = TrackingDB( session ) # set policy parameters groups = {} # get list of groups under processing grlist = ",".join(["%s" % k for k in self.groupsUnderProcessing]) # get information about tasks associated to these groups self.jobPerTask = db.getUnprocessedJobs( grlist ) # process all groups grid = 0 while self.jobPerTask != [] : grid = grid + 1 ntasks = 0 # ignore groups under processing if grid in self.groupsUnderProcessing: logging.info( "skipping group " + str(grid)) continue # build group information groups[grid] = '' jobsReached = 0 logging.info('filling group ' + str(grid) + ' with largest tasks') # fill group with the largest tasks while self.jobPerTask != [] and ntasks < 30 : try: task, jobs = self.jobPerTask[0] # stop when there are enough jobs if jobsReached + int(jobs) > self.maxJobs \ and jobsReached != 0: break # add task to group groups[grid] += str(task) + ',' jobsReached += int(jobs) self.jobPerTask.pop(0) # stop when there are too much tasks ntasks += 1 # go to next task except IndexError, ex: self.jobPerTask.pop(0) logging.info("\n\n" + str(ex) + "\n\n") continue logging.info('filling group ' + str(grid) + \ ' with the smallest tasks') # fill group with the smallest tasks while self.jobPerTask != [] and ntasks < 30 : try: task, jobs = self.jobPerTask[0] # stop when there are enough jobs if jobsReached + int(jobs) > self.maxJobs: break # add task to group groups[grid] += task + ',' jobsReached += int(jobs) self.jobPerTask.pop() # stop when there are too much tasks ntasks += 1 # go to next task except IndexError: self.jobPerTask.pop() continue logging.info("group " + str(grid) + " filled with tasks " \ + groups[grid] + " and total jobs " \ + str(jobsReached))
def bossQuery( cls, bossSession, taskId ): """ Perform the LB query through BOSS """ logging.info('%s Retrieving status for jobs of task %s' \ % ( cls.fullId(), str(taskId) ) ) # default values offset = 0 loop = True jobRange = '' runningAttrs = {'processStatus': '%handled', 'closed' : 'N'} jobsToPoll = cls.params['jobsToPoll'] # get scheduler db = TrackingDB( bossSession.bossLiteDB ) scheduler = db.getTaskScheduler(taskId) if scheduler is None: logging.error( '%s Unable to retrieve Scheduler, skip check for task %s' \ % ( cls.fullId(), str(taskId) ) ) return del db # perform query while loop : try : task = bossSession.load( taskId, runningAttrs=runningAttrs, \ strict=False, \ limit=jobsToPoll, offset=offset ) if task.jobs == [] : loop = False break else: offset += jobsToPoll if task['user_proxy'] is None : task['user_proxy'] = '' # # this is the correct way... # Scheduler session # schedulerConfig = { 'timeout' : len( task.jobs ) * 30 } # # schedSession = \ # BossLiteAPISched( bossSession, schedulerConfig, task ) # # task = schedSession.query( task, queryType='parent' ) # # for job in task.jobs : # print job.runningJob['jobId'], \ # job.runningJob['schedulerId'], \ # job.runningJob['statusScheduler'], \ # job.runningJob['statusReason'] # # this is workaround for the glite bug... jobRange = '%s:%s' % ( task.jobs[0]['jobId'], \ task.jobs[-1]['jobId'] ) command = \ 'python ' + \ '$PRODAGENT_ROOT/lib/JobTracking/QueryStatus.py ' + \ str(taskId) + ' ' + jobRange + ' ' + scheduler + \ ' ' + task['user_proxy'] logging.debug('%s EXECUTING: %s' \ % (cls.fullId(), str(command))) msg, ret = executeCommand( command, len( task.jobs ) * 30 ) logging.debug( "%s SUBPROCESS MESSAGE : \n%s " % \ (cls.fullId(), msg ) ) # log the end of the query logging.info('%s LB status retrieved for jobs %s of task %s' \ %(cls.fullId(), jobRange, str(taskId) ) ) del task, msg, command except TimeOut, e: logging.error( "%s Failed to retrieve status for jobs of task %s : %s" \ % (cls.fullId(), str(taskId), str( e ) ) ) logging.error( "%s PARTIAL SUBPROCESS MESSAGE : \n%s" \ % (cls.fullId(), e.commandOutput() ) ) offset += int(cls.params['jobsToPoll']) except BossLiteError, e: logging.error( "%s Failed to retrieve status for jobs of task %s : %s" \ % (cls.fullId(), str(taskId), str( e ) ) ) offset += int(cls.params['jobsToPoll'])