Example #1
0
    def master_resubmit(self,jobs):
        '''Resubmit failed Jedi job'''
        from pandatools import Client

        jobIDs = {}
        for job in jobs: 
            jobIDs[job.backend.id] = job

        allJobIDs = jobIDs.keys()
        pandaJobIDs = {}
        for jID in allJobIDs:
            with inject_proxy(self.credential_requirements):
                status, jediTaskDict = Client.getJediTaskDetails({'jediTaskID': jID},False,True,verbose=False)
            if status != 0:
                logger.error("Failed to get task details for %s" % jID)
                raise BackendError('Jedi','Return code %d retrieving job status information.' % status)

            # Retrieve job
            job = jobIDs[jediTaskDict['jediTaskID']]
       
            newJobsetID = -1 # get jobset
            retryJobs = [] # jspecs
            resubmittedJobs = [] # ganga jobs

            if jediTaskDict['status'] in ['failed', 'killed', 'cancelled', 'aborted', 'broken', 'finished' ]:
                retryJobs.append(job)
                resubmittedJobs.append(jID)
            #elif jediTaskDict['status'] == 'finished':
            #    pass
            else:
                logger.warning("Cannot resubmit. Jedi task %s is status %s." %(jID, jediTaskDict['status'] ))
                return False

            # submit
            if len(retryJobs)==0:
                logger.warning("No failed jobs to resubmit")
                return False

            with inject_proxy(self.credential_requirements):
                status,out = Client.retryTask(jID, verbose=False)
            if status != 0:
                logger.error(status)
                logger.error(out)
                logger.error("Failed to retry JobID=%s" % jID)                                                                                         
                return False
            tmpStat,tmpDiag = out
            if not tmpStat:
                logger.error(tmpDiag)
                logger.error("Failed to retry JobID=%s" % jID)
                return False
            logger.info(tmpDiag)
       
            job.backend.status = None
            job.backend.jobSpec = {}
            job.updateStatus('submitted')

        logger.info('Resubmission successful')
        return True
Example #2
0
    def master_submit(self,rjobs,subjobspecs,buildjobspec):
        '''Submit jobs'''
       
        from pandatools import Client
        from pandatools import MiscUtils

        from Ganga.Core import IncompleteJobSubmissionError
        from Ganga.Utility.logging import log_user_exception

        job = self.getJobObject()

        # job name
        jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen()

        jobspecs = {}
        if buildjobspec:
            jobspecs = buildjobspec
        else:
            jobspecs = subjobspecs

        logger.debug(jobspecs)

        # submit task
        for subjob in rjobs:
            subjob.updateStatus('submitting')

        logger.info("Submitting to Jedi ...")
        verbose = logger.isEnabledFor(10)
        with inject_proxy(self.credential_requirements):
            status, tmpOut = Client.insertTaskParams(jobspecs, verbose)

        logger.debug(tmpOut)

        if status != 0:
            logger.error("Task submission to Jedi failed with %s " %status)
            return False
        if tmpOut[0] == False:
            logger.error("Task submission to Jedi failed %s" %tmpOut[1])
            return False
        logger.info("Task submission to Jedi suceeded with new jediTaskID=%s" %tmpOut[1])

        #if buildjobspec:
        #    job.backend.buildjob = PandaBuildJob() 
        #    job.backend.buildjob.id = jobids[0][0]
        #    job.backend.buildjob.url = 'http://panda.cern.ch/?job=%d'%jobids[0][0]
        #    del jobids[0]

        for subjob in rjobs:
            subjob.backend.id = tmpOut[1]
            subjob.backend.url = 'http://pandamon.cern.ch/jedi/taskinfo?days=20&task=%d'%tmpOut[1]
            subjob.updateStatus('submitted')
            logger.info("Panda monitor url: %s" %subjob.backend.url)

        return True
Example #3
0
    def master_kill(self):
        '''Kill jobs'''  

        from pandatools import Client

        job = self.getJobObject()
        logger.debug('Killing job %s' % job.getFQID('.'))

        active_status = [ None, 'registered', 'waiting', 'defined', 'pending', 'assigning', 'ready', 'scouting', 'running', 'holding', 'merging', 'prepared', 'aborting', 'finishing' ]
        #active_status = [ None, 'defined', 'unknown', 'assigned', 'waiting', 'activated', 'sent', 'starting', 'running', 'holding', 'transferring' ]

        if self.id and self.status in active_status:
            with inject_proxy(self.credential_requirements):
                status, output = Client.killTask(self.id)
            if status:
                logger.error('Failed killing job (status = %d)',status)
                return False
            else:
                logger.info('Killing Jedi task %s, Server returned: %s' %(self.id, output))
        else:
            logger.error('Cannot kill Jedi job %s since it is not in active status', self.id)

        return True