def getFailureReason(job): gridJobId = job.getCurrentGID() try: gridJobInfo = arclib.GetJobInfo(gridJobId, arclib.MDS_FILTER_JOBINFO, True, "", 30) except: err = sys.exc_info()[0] debug(err) info('Job %s failure reason unknown' % job.id) return '' return gridJobInfo.errors
def load(self): newmap = {} debug('refreshing qinfo') self.qinfo = arclib.GetQueueInfo() debug('refreshing job statuses') jobIds = arclib.GetJobIDsList() jobs = arclib.GetJobInfo(jobIds, arclib.MDS_FILTER_JOBINFO, True, "", 30) job = arclib.Job() for job in jobs: newmap[job.id] = job.status self.cachemap = newmap self.refreshTime = time.time()
def jobStatus(self, jobId): """Retrieve status of a particular job. returns: dictionary containing keys name, status, error... (see allJobStatus).""" logger.debug('Requesting job status for %s.' % jobId) jobInfo = {'name': 'UNKNOWN', 'status': 'NOT FOUND', 'error': -1} # check if we know this job at all: self.__lockArclib() job_ = arclib.GetJobIDs([jobId]) self.__unlockArclib() # ugly! GetJobIDs return some crap if not found... jobName = [j for j in job_][0] if jobName == '': # job not found logger.debug('Job %s was not found.' % jobId) else: jobInfo['name'] = jobName # ASSERT(jobId = jobs[jobName]) self.__lockArclib() try: logger.debug('Querying job %s (%s)' % (jobId, jobName)) info = arclib.GetJobInfo(jobId) jobInfo['status'] = info.status jobInfo['error'] = info.exitcode jobInfo['submitted'] = info.submission_time.__str__() jobInfo['completed'] = info.completion_time.__str__() # jobInfo['cpu_time' ] = info.used_cpu_time.__str__() # jobInfo['wall_time'] = info.used_wall_time.__str__() except arclib.ARCLibError as err: logger.error('Could not query: %s' % err.what()) jobInfo['status'] = 'UNABLE TO RETRIEVE: ' + err.what(), jobInfo['error'] = 255 jobInfo['submitted'] = 'unknown' self.__unlockArclib() logger.debug(' Returned %s' % jobInfo) return jobInfo
def AllJobStatus(self): """Query status of jobs in joblist. The command returns a dictionary of jobIDs. Each item in the dictionary consists of an additional dictionary with the attributes: name = Job name status = ARC job states, ACCPTED, SUBMIT, INLRMS etc error = Error status sub_time = str(submission_time) completion = str(completion_time) cpu_time = str(used_cpu_time) wall_time = str(used_wall_time) If there was an error, an empty dictionary is returned. Example: jobList = ui.jobStatus() print jobList['gsiftp://...3217']['name'] print jobList['gsiftp://...3217']['status'] @rtype: dict @return: job status dictionary.""" logger.debug('Requesting job status for all jobs.') jobList = {} # GetJobIDs returns a multimap, mapping job names to JobIDs... self.__lockArclib() try: # ATTENTION: GetJobIDs does not throw an exception # if the .ngjobs file is not found. Instead, it # only complains on stderr and returns {}. if not os.path.isfile(os.path.join(self._userdir, '.ngjobs')): logger.debug('No Job file found, skipping') return jobList else: jobIds = arclib.GetJobIDs() except Exception as err: logger.error('could not get job IDs: %s', err) self.__unlockArclib() return jobList self.__unlockArclib() # use an iterator over the multimap elements # do not call iter.next() at the end (segfaults!) iter = jobIds.begin() i = 0 while i < jobIds.size(): i = i + 1 (jobName, jobId) = next(iter) # this is what GetJobIDs really does when called with no arguments # jobListFile = open(os.path.join(self._userdir, # '.ngjobs'), 'rb') # lines = jobListFile.readlines() # jobListFile.close() # for line in lines: # (jobId, jobName) = line.strip().split('#') logger.debug('Querying job %s (%s)' % (jobId, jobName)) jobList[jobId] = {} jobList[jobId]['name'] = jobName status = None exitCode = None sub_time = None self.__lockArclib() try: # jobInfo = arclib.GetJobInfoDirect(jobId) jobInfo = arclib.GetJobInfo(jobId) status = jobInfo.status exitCode = jobInfo.exitcode sub_time = jobInfo.submission_time.__str__() completed = jobInfo.completion_time.__str__() # cpu_time = jobInfo.used_cpu_time.__str__() # wall_time= jobInfo.used_wall_time.__str__() except arclib.FTPControlError: logger.error('Failed to query job %s' % jobName) status = 'REMOVED' exitCode = -1 completed = None cpu_time = None wall_time = None self.__unlockArclib() jobList[jobId]['status'] = status jobList[jobId]['error'] = exitCode jobList[jobId]['submitted'] = sub_time jobList[jobId]['completed'] = completed # jobList[jobId]['cpu_time' ] = sub_time # jobList[jobId]['wall_time'] = sub_time logger.debug(' %s: %s' % (jobId, jobList[jobId])) return jobList
# '.ngjobs'), 'r') # lines = jobListFile.readlines() # jobListFile.close() # for line in lines: # (jobId, jobName) = line.strip().split('#') logger.debug('Querying job %s (%s)' % (jobId, jobName)) jobList[jobId] = {} jobList[jobId]['name'] = jobName status = None exitCode = None sub_time = None self.__lockArclib() try: # jobInfo = arclib.GetJobInfoDirect(jobId) jobInfo = arclib.GetJobInfo(jobId) status = jobInfo.status exitCode = jobInfo.exitcode sub_time = jobInfo.submission_time.__str__() completed= jobInfo.completion_time.__str__() # cpu_time = jobInfo.used_cpu_time.__str__() # wall_time= jobInfo.used_wall_time.__str__() except arclib.FTPControlError: logger.error('Failed to query job %s' % jobName) status = 'REMOVED' exitCode = -1 completed = None cpu_time = None wall_time = None self.__unlockArclib()