Beispiel #1
0
def getFailureReason(job):

    gridJobId = job.getCurrentGID()

    try:
        gridJobInfo = arclib.GetJobInfo(gridJobId, arclib.MDS_FILTER_JOBINFO,
                                        True, "", 30)
    except:
        err = sys.exc_info()[0]
        debug(err)
        info('Job %s failure reason unknown' % job.id)
        return ''

    return gridJobInfo.errors
Beispiel #2
0
    def load(self):
        newmap = {}

        debug('refreshing qinfo')
        self.qinfo = arclib.GetQueueInfo()

        debug('refreshing job statuses')
        jobIds = arclib.GetJobIDsList()
        jobs = arclib.GetJobInfo(jobIds, arclib.MDS_FILTER_JOBINFO, True, "",
                                 30)

        job = arclib.Job()
        for job in jobs:
            newmap[job.id] = job.status

        self.cachemap = newmap
        self.refreshTime = time.time()
Beispiel #3
0
    def jobStatus(self, jobId):
        """Retrieve status of a particular job.

           returns: dictionary containing keys name, status, error...
           (see allJobStatus)."""

        logger.debug('Requesting job status for %s.' % jobId)

        jobInfo = {'name': 'UNKNOWN', 'status': 'NOT FOUND', 'error': -1}

        # check if we know this job at all:
        self.__lockArclib()
        job_ = arclib.GetJobIDs([jobId])
        self.__unlockArclib()

        # ugly! GetJobIDs return some crap if not found...
        jobName = [j for j in job_][0]
        if jobName == '':  # job not found
            logger.debug('Job %s was not found.' % jobId)
        else:
            jobInfo['name'] = jobName
            # ASSERT(jobId = jobs[jobName])

            self.__lockArclib()
            try:
                logger.debug('Querying job %s (%s)' % (jobId, jobName))
                info = arclib.GetJobInfo(jobId)
                jobInfo['status'] = info.status
                jobInfo['error'] = info.exitcode
                jobInfo['submitted'] = info.submission_time.__str__()
                jobInfo['completed'] = info.completion_time.__str__()
                # jobInfo['cpu_time' ] = info.used_cpu_time.__str__()
                # jobInfo['wall_time'] = info.used_wall_time.__str__()

            except arclib.ARCLibError as err:
                logger.error('Could not query: %s' % err.what())
                jobInfo['status'] = 'UNABLE TO RETRIEVE: ' + err.what(),
                jobInfo['error'] = 255
                jobInfo['submitted'] = 'unknown'
            self.__unlockArclib()
        logger.debug(' Returned %s' % jobInfo)
        return jobInfo
Beispiel #4
0
    def AllJobStatus(self):
        """Query status of jobs in joblist.

        The command returns a dictionary of jobIDs. Each item
        in the dictionary consists of an additional dictionary with the
        attributes:

            name = Job name
            status = ARC job states, ACCPTED, SUBMIT, INLRMS etc
            error = Error status
            sub_time = str(submission_time)
            completion = str(completion_time)
            cpu_time = str(used_cpu_time)
            wall_time = str(used_wall_time)

        If there was an error, an empty dictionary is returned.

        Example:

            jobList = ui.jobStatus()

            print jobList['gsiftp://...3217']['name']
            print jobList['gsiftp://...3217']['status']

        @rtype: dict
        @return: job status dictionary."""

        logger.debug('Requesting job status for all jobs.')

        jobList = {}

        # GetJobIDs returns a multimap, mapping job names to JobIDs...
        self.__lockArclib()
        try:
            # ATTENTION: GetJobIDs does not throw an exception
            # if the .ngjobs file is not found. Instead, it
            # only complains on stderr and returns {}.
            if not os.path.isfile(os.path.join(self._userdir, '.ngjobs')):
                logger.debug('No Job file found, skipping')
                return jobList
            else:
                jobIds = arclib.GetJobIDs()
        except Exception as err:
            logger.error('could not get job IDs: %s', err)
            self.__unlockArclib()
            return jobList

        self.__unlockArclib()

        # use an iterator over the multimap elements
        # do not call iter.next() at the end (segfaults!)
        iter = jobIds.begin()
        i = 0
        while i < jobIds.size():
            i = i + 1
            (jobName, jobId) = next(iter)
            # this is what GetJobIDs really does when called with no arguments
            #        jobListFile = open(os.path.join(self._userdir,
            #                               '.ngjobs'), 'rb')
            #        lines = jobListFile.readlines()
            #        jobListFile.close()
            #        for line in lines:
            # (jobId, jobName) = line.strip().split('#')
            logger.debug('Querying job %s (%s)' % (jobId, jobName))
            jobList[jobId] = {}
            jobList[jobId]['name'] = jobName
            status = None
            exitCode = None
            sub_time = None

            self.__lockArclib()
            try:
                # jobInfo = arclib.GetJobInfoDirect(jobId)
                jobInfo = arclib.GetJobInfo(jobId)
                status = jobInfo.status
                exitCode = jobInfo.exitcode
                sub_time = jobInfo.submission_time.__str__()
                completed = jobInfo.completion_time.__str__()
                # cpu_time = jobInfo.used_cpu_time.__str__()
                # wall_time= jobInfo.used_wall_time.__str__()

            except arclib.FTPControlError:
                logger.error('Failed to query job %s' % jobName)
                status = 'REMOVED'
                exitCode = -1
                completed = None
                cpu_time = None
                wall_time = None
            self.__unlockArclib()

            jobList[jobId]['status'] = status
            jobList[jobId]['error'] = exitCode
            jobList[jobId]['submitted'] = sub_time
            jobList[jobId]['completed'] = completed
            # jobList[jobId]['cpu_time' ] = sub_time
            # jobList[jobId]['wall_time'] = sub_time
            logger.debug(' %s: %s' % (jobId, jobList[jobId]))

        return jobList
Beispiel #5
0
#                               '.ngjobs'), 'r')
#        lines = jobListFile.readlines()
#        jobListFile.close()
#        for line in lines:
#            (jobId, jobName) = line.strip().split('#')
            logger.debug('Querying job %s (%s)' % (jobId, jobName))
            jobList[jobId] = {}
            jobList[jobId]['name'] = jobName
            status = None
            exitCode = None
            sub_time = None

            self.__lockArclib()
            try:
                # jobInfo = arclib.GetJobInfoDirect(jobId)
                jobInfo  = arclib.GetJobInfo(jobId)
                status   = jobInfo.status
                exitCode = jobInfo.exitcode
                sub_time = jobInfo.submission_time.__str__()
                completed= jobInfo.completion_time.__str__()
                # cpu_time = jobInfo.used_cpu_time.__str__()
                # wall_time= jobInfo.used_wall_time.__str__()

            except arclib.FTPControlError:
                logger.error('Failed to query job %s' % jobName)
                status = 'REMOVED'
                exitCode = -1
                completed = None
                cpu_time = None
                wall_time = None
            self.__unlockArclib()