Beispiel #1
0
    def removeFinishedJobs(cls, group):
        """
        remove all finished jobs from a specific group.

        jobs assigned: all jobs in the group

        """

        try:
            session = BossLitePoolDB( "MySQL", pool=cls.params['sessionPool'] )
            db = TrackingDB( session )
            joblist = db.getAssociatedJobs()

            # in case of empty results
            if joblist is None:
                logging.debug(
                    "No finished jobs to be removed from query queues" )
                return

            for pair in joblist:
                db.removeFromCheck( group, pair[0],  pair[1],  )
                #logging.debug(
                #    "Removing jobs for group " + str(group) \
                #    + " with BOSS id " +  str( pair[0] ) + '.' \
                #    + str( pair[1] )\
                #    )
            logging.debug("Removed jobs from group %s" % str(group) )
            session.close()
            del( joblist )

        except BossLiteError, ex:
            logging.error( 'Failed to remove jobs from queues: %s ' % ex )
Beispiel #2
0
    def addNewJobs(cls):
        """
        include new jobs in the set of jobs to be watched for.

        jobs assigned: all new jobs.

        """

        try:

            session = BossLitePoolDB( "MySQL", pool=cls.params['sessionPool'] )
            db = TrackingDB( session )
            joblist = db.getUnassociatedJobs()

            # in case of empty results
            if joblist is None:
                logging.debug( "No new jobs to be added in query queues")
                return

            for pair in joblist:
                db.addForCheck( pair[0],  pair[1] )

                #logging.debug(\
                #    "Adding jobs to queue with BOSS id "\
                #    +  str( pair[0] ) + '.' + str( pair[1] )\
                #    )
            session.close()
            del( joblist )

        except BossLiteError, ex:
            logging.error( 'Failed to remove jobs from queues: %s ' % ex )
Beispiel #3
0
    def __init__(self, schedType, **args):
        """ __init__ """

        self.schedType = schedType
	self.bossTask = None
        self.scheduler = None
        self.counters = ['pending', 'submitted', 'waiting', 'ready', \
                         'scheduled', 'running', 'cleared', 'created', 'other'] 
        print dbConfig 
        self.bossLiteSession = BossLiteAPI('MySQL', dbConfig, makePool=True)
        self.sessionPool = self.bossLiteSession.bossLiteDB.getPool()
        self.db = TrackingDB( self.bossLiteSession.bossLiteDB )
Beispiel #4
0
    def doWork(cls, group):
        """
        get the status of the jobs in the group.

        jobs assigned: all jobs in the group.

        """

        logging.info("%s Getting job status for jobs in group %s" \
                     %( cls.fullId(), str(group) ) )

        try:
            # get DB sessions
            bossSession = BossLiteAPI("MySQL", pool=cls.params['sessionPool'])
            db = TrackingDB( bossSession.bossLiteDB )
            tasks = db.getGroupTasks(group)

            for taskId in tasks :
                cls.bossQuery( bossSession, int(taskId) )

        except BossLiteError, ex:
            logging.error( "%s JobTrackingThread exception: %s" \
                           %( cls.fullId(), ex ) )
Beispiel #5
0
class PilotBossMonitor(MonitorInterface):
    """ 
    _PilotBossMonitor_ 
    """
    def __init__(self, schedType, **args):
        """ __init__ """

        self.schedType = schedType
	self.bossTask = None
        self.scheduler = None
        self.counters = ['pending', 'submitted', 'waiting', 'ready', \
                         'scheduled', 'running', 'cleared', 'created', 'other'] 
        print dbConfig 
        self.bossLiteSession = BossLiteAPI('MySQL', dbConfig, makePool=True)
        self.sessionPool = self.bossLiteSession.bossLiteDB.getPool()
        self.db = TrackingDB( self.bossLiteSession.bossLiteDB )
    
    def monitorPilot(self, site, tqStateApi):
        """ __monitorPilot__ 
        This fnction will submit pilot jobs using
        the selected mechanism  
        """
         
	if ( self.schedType == 'LSF' ):
	    #schedConfig = {'user_proxy' : '', 'service' : '', 'config' : '' }
            schedConfig = {'cpCmd': 'rfcp', 'rfioSer': '%s:' % self.host}
	    #self.scheduler = Scheduler.Scheduler('SchedulerLsf', schedConfig)
            
            poller = PABossLitePoll()
            poller()
            total = poller.pilotJobs
            #start the logic here
 
    def getGroups(self, group):
        tasks = self.db.getGroupTasks(group)

        for taskId in tasks :        
            print taskId

    def getStatistic(self):
        """
        __getStatistics__

        Poll the BOSS DB for a summary of the job status

        """

        # summary of the jobs in the DB
        result = self.db.getJobsStatistic()

        if result is not None:

            counter = {}
            for ctr in self.counters:
                counter[ctr] = 0

            for pair in result :
                status, count = pair
                if status == 'E':
                    continue
                elif status == 'R' :
                    counter['running'] = count
                elif status == 'I':
                    counter['pending'] = count
                elif status == 'SW' :
                    counter['waiting'] = count
                elif status == 'SR':
                    counter['ready'] = count
                elif status == 'SS':
                    counter['scheduled'] = count
                elif status == 'SU':
                    counter['submitted'] = count
                elif status == 'SE':
                    counter['cleared'] = count
                elif status == 'C':
                    counter['created'] = count
                else:
                    counter['other'] += count

            # display counters
            for ctr, value in counter.iteritems():
                print(ctr + " jobs : " + str(value))
            print("....................")

            return result


    def pollJobs(self, runningAttrs, processStatus, skipStatus=None ):
        """
        __pollJobs__

        basic structure for jobs polling

        """

        offset = 0
        loop = True

        while loop :

            logging.debug("Max jobs to be loaded %s:%s " % \
                         (str( offset ), str( offset + self.jobLimit) ) )

            self.newJobs = self.bossLiteSession.loadJobsByRunningAttr(
                runningAttrs=runningAttrs, \
                limit=self.jobLimit, offset=offset
                )

            logging.info("Polled jobs : " + str( len(self.newJobs) ) )

            # exit if no more jobs to query
            if self.newJobs == [] :
                loop = False
                break
            else :
                offset += self.jobLimit

            try:
                self.db.processBulkUpdate( self.newJobs, processStatus, \
                                           skipStatus )
                logging.info( "Changed status to %s for %s loaded jobs" \
                              % ( processStatus, str( len(self.newJobs) ) ) )

            except BossLiteError, err:
                logging.error(
                    "Failed handling %s loaded jobs, waiting next round: %s" \
                    % ( processStatus, str( err ) ) )
                continue
Beispiel #6
0
    def applyPolicy(self):
        """
        __applyPolicy__

        apply policy.
        """

        # get DB session
        session = BossLitePoolDB( "MySQL", pool=self.sessionPool )
        db = TrackingDB( session )

        # set policy parameters
        groups = {}

        # get list of groups under processing
        grlist = ",".join(["%s" % k for k in self.groupsUnderProcessing])

        # get information about tasks associated to these groups
        self.jobPerTask = db.getUnprocessedJobs( grlist )

        # process all groups
        grid = 0
        while self.jobPerTask != [] :

            grid = grid + 1
            ntasks = 0

            # ignore groups under processing
            if grid in self.groupsUnderProcessing:
                logging.info( "skipping group " + str(grid))
                continue

            # build group information
            groups[grid] = ''
            jobsReached = 0

            logging.info('filling group ' + str(grid) + ' with largest tasks')

            # fill group with the largest tasks
            while self.jobPerTask != [] and ntasks < 30 :
                try:

                    task, jobs = self.jobPerTask[0]

                    # stop when there are enough jobs
                    if jobsReached + int(jobs) > self.maxJobs \
                           and jobsReached != 0:
                        break

                    # add task to group
                    groups[grid] += str(task) + ','
                    jobsReached += int(jobs)
                    self.jobPerTask.pop(0)

                    # stop when there are too much tasks
                    ntasks += 1

                # go to next task
                except IndexError, ex:
                    self.jobPerTask.pop(0)
                    logging.info("\n\n" + str(ex) + "\n\n")
                    continue

            logging.info('filling group ' + str(grid) + \
                          ' with the smallest tasks')

            # fill group with the smallest tasks
            while self.jobPerTask != [] and ntasks < 30 :
                try:

                    task, jobs = self.jobPerTask[0]

                    # stop when there are enough jobs
                    if jobsReached + int(jobs)  > self.maxJobs:
                        break

                    # add task to group
                    groups[grid] += task + ','
                    jobsReached += int(jobs)
                    self.jobPerTask.pop()

                    # stop when there are too much tasks
                    ntasks += 1

                # go to next task
                except IndexError:
                    self.jobPerTask.pop()
                    continue

            logging.info("group " + str(grid) + " filled with tasks " \
                          + groups[grid] + " and total jobs " \
                          + str(jobsReached))
Beispiel #7
0
    def bossQuery( cls, bossSession, taskId ):
        """
        Perform the LB query through BOSS
        """

        logging.info('%s Retrieving status for jobs of task %s'  \
                     % ( cls.fullId(), str(taskId) ) )

        # default values
        offset = 0
        loop = True
        jobRange = ''
        runningAttrs = {'processStatus': '%handled',
                        'closed' : 'N'}
        jobsToPoll = cls.params['jobsToPoll']

        # get scheduler        
        db = TrackingDB( bossSession.bossLiteDB )
        scheduler = db.getTaskScheduler(taskId)
        if scheduler is None:
            logging.error(
                '%s Unable to retrieve Scheduler, skip check for task  %s' \
                % ( cls.fullId(), str(taskId) )
                )
            return
        del db

        # perform query
        while loop :
            try :
                task = bossSession.load(
                    taskId, runningAttrs=runningAttrs, \
                    strict=False, \
                    limit=jobsToPoll, offset=offset )

                if task.jobs == [] :
                    loop = False
                    break
                else:
                    offset += jobsToPoll

                if task['user_proxy'] is None :
                    task['user_proxy'] = ''

                # # this is the correct way...
                # Scheduler session
                # schedulerConfig = { 'timeout' : len( task.jobs ) * 30 }
                #
                # schedSession = \
                #        BossLiteAPISched( bossSession, schedulerConfig, task )
                #
                # task = schedSession.query( task, queryType='parent' )
                #
                # for job in task.jobs :
                #     print job.runningJob['jobId'], \
                #           job.runningJob['schedulerId'], \
                #           job.runningJob['statusScheduler'], \
                #           job.runningJob['statusReason']

                # # this is workaround for the glite bug...
                jobRange = '%s:%s' % ( task.jobs[0]['jobId'], \
                                       task.jobs[-1]['jobId'] )

                command = \
                        'python ' + \
                        '$PRODAGENT_ROOT/lib/JobTracking/QueryStatus.py ' + \
                        str(taskId) + ' ' + jobRange + ' ' + scheduler + \
                        ' ' + task['user_proxy']

                logging.debug('%s EXECUTING: %s' \
                              % (cls.fullId(), str(command)))
                msg, ret = executeCommand( command, len( task.jobs ) * 30 )
                logging.debug( "%s SUBPROCESS MESSAGE : \n%s " % \
                               (cls.fullId(), msg ) )

                # log the end of the query
                logging.info('%s LB status retrieved for jobs %s of task %s' \
                             %(cls.fullId(), jobRange, str(taskId) ) )
                del task, msg, command

            except TimeOut, e:
                logging.error(
                    "%s Failed to retrieve status for jobs of task %s : %s" \
                    % (cls.fullId(), str(taskId), str( e ) ) )
                logging.error( "%s PARTIAL SUBPROCESS MESSAGE : \n%s" \
                               % (cls.fullId(),  e.commandOutput() ) )
                offset += int(cls.params['jobsToPoll'])

            except BossLiteError, e:
                logging.error(
                    "%s Failed to retrieve status for jobs of task %s : %s" \
                    % (cls.fullId(), str(taskId), str( e ) ) )
                offset += int(cls.params['jobsToPoll'])