def master_updateMonitoringInformation(jobs): """Updates the statuses of the list of jobs provided by issuing crab -status.""" logger.debug('Updating the monitoring information of ' + str(len(jobs)) + ' jobs') server = CRABServer() for j in jobs: logger.debug('Updating monitoring information for job %d (%s)' % (j.id, j.status)) try: server.status(j) except: logger.error('Get status for job %d failed, skipping.' % j.id) continue doc_path = '%s/share/%s' % (j.inputdata.ui_working_dir, j.inputdata.xml_report) if os.path.isfile(doc_path): logger.debug('Found XML report for the job %d' % j.id) jobDoc = parse(doc_path).getElementsByTagName("RunningJob") for subjobDoc in jobDoc: index = subjobDoc.getAttribute("jobId") j.subjobs[int(index) - 1].backend.checkReport(subjobDoc) j.subjobs[int(index) - 1].backend.checkStatus() j.updateMasterJobStatus() else: logger.info('No results.xml for %s' % (j.id))
def master_kill(self): #Kills a job & subjobs job = self.getJobObject() server = CRABServer() try: server.kill(job) except: logger.warning('Killing the job using CRAB failed.') return 1 if len(job.subjobs): for s in job.subjobs: if not s.status in ['completed','failed']: s.updateStatus('killed') else: if not job.status in ['completed','failed']: job.updateStatus('killed') job.updateMasterJobStatus() try: server.status(job) except: logger.warning('Get job status from CRAB failed. Job may have not be killed.') return 1
def postMortem(self, job): logger.info('postMortem') #Gets post Mortem imformation of failed job server = CRABServer() try: server.postMortem(job) except: logger.warning('PostMortem retrival with CRAB failed.') job.updateMasterJobStatus() return 1
def postMortem(self,job): logger.info('postMortem') #Gets post Mortem imformation of failed job server = CRABServer() try: server.postMortem(job) except: logger.warning('PostMortem retrival with CRAB failed.') job.updateMasterJobStatus() return 1
def master_resubmit(self, rjobs): """Performs the resubmission of all the jobs in a jobset.""" if rjobs[0]: job = rjobs[0].master for subjob in job.subjobs: subjob.updateStatus('submitting') try: CRABServer().resubmit(job) except CRABServerError: logger.error('Resubmission through CRAB failed.') for subjob in job.subjobs: subjob.rollbackToNewState() job.updateMasterJobStatus() logger.info('All subjobs have been reverted to "new".') return False # This will perform a crab -status and parse the XML. self.master_updateMonitoringInformation((job, )) # Forcing all the jobs to be submitted, so the monitoring loops # keeps issuing calls after to update. for subjob in job.subjobs: if subjob.status in ('submitting'): subjob.updateStatus('submitted') job.updateMasterJobStatus() else: logger.warning('Not resubmitting job without subjobs.') return True
def master_kill(self): #Kills a job & subjobs job = self.getJobObject() server = CRABServer() try: server.kill(job) except: logger.warning('Killing the job using CRAB failed.') return 1 if len(job.subjobs): for s in job.subjobs: if not s.status in ['completed', 'failed']: s.updateStatus('killed') else: if not job.status in ['completed', 'failed']: job.updateStatus('killed') job.updateMasterJobStatus() try: server.status(job) except: logger.warning( 'Get job status from CRAB failed. Job may have not be killed.') return 1
def parseResults(self): job = self.getJobObject() server = CRABServer() try: server.status(job) server.getOutput(job) except: logger.error('Could not get the output of the job.') # Let's not raise this yet (in case of a double call). # raise CRABServerError('Impossible to get the output of the job') workdir = job.inputdata.ui_working_dir index = int(job.id) + 1 doc_path = '%s/res/crab_fjr_%d.xml' % (workdir, index) if not os.path.exists(doc_path): logger.error('FJR %s not found.' % (doc_path)) return try: doc = parse(doc_path) except: logger.error("Could not parse document. File not present?") return status = doc.firstChild.getAttribute("Status") if status in ["Failed"]: self.postMortem(job) job.updateStatus('failed') elif status in ["Success"]: if job.status == 'submitting': job.updateStatus('submitted') job.updateStatus('completed') else: logger.warning("UNKNOWN PARSE STATUS: " + str(status)) config = Config.getConfig('Metrics') location = config['location'] if not os.path.exists(location): raise BackendError('CRAB', 'Location %s file doesnt exist.' % (location)) config = ConfigParser() config.read(location) #Iterate over all them SECTIONS = config.sections() if 'report' in SECTIONS: SECTIONS.remove('report') # Only five sections work here... for section in SECTIONS: if section not in job.backend.fjr: job.backend.fjr[section] = {} performancereport = doc.getElementsByTagName( "PerformanceReport")[0] performancesummary = performancereport.getElementsByTagName( "PerformanceSummary") for pfs in performancesummary: if pfs.getAttribute("Metric") == section: metrics = pfs.getElementsByTagName("Metric") for metric in metrics: name = metric.getAttribute("Name") if config.has_option(section, name): # Due to the names with minus intead of underscore, we have to do thiw walkarround # to send them to the DB. name = config.get(section, name) if name: job.backend.fjr[section][ name] = metric.getAttribute("Value")
def parseResults(self): job = self.getJobObject() server = CRABServer() try: server.status(job) server.getOutput(job) except: logger.error('Could not get the output of the job.') # Let's not raise this yet (in case of a double call). # raise CRABServerError('Impossible to get the output of the job') workdir = job.inputdata.ui_working_dir index = int(job.id) + 1 doc_path = '%s/res/crab_fjr_%d.xml'%(workdir,index) if not os.path.exists(doc_path): logger.error('FJR %s not found.'%(doc_path)) return try: doc = parse(doc_path) except: logger.error("Could not parse document. File not present?") return status = doc.firstChild.getAttribute("Status") if status in ["Failed"]: self.postMortem(job) job.updateStatus('failed') elif status in ["Success"]: if job.status == 'submitting': job.updateStatus('submitted') job.updateStatus('completed') else: logger.warning("UNKNOWN PARSE STATUS: "+str(status)) config = Config.getConfig('Metrics') location = config['location'] if not os.path.exists(location): raise BackendError(0,'Location %s file doesnt exist.'%(location)) config = ConfigParser() config.read(location) #Iterate over all them SECTIONS = config.sections() if 'report' in SECTIONS: SECTIONS.remove('report') # Only five sections work here... for section in SECTIONS: if section not in job.backend.fjr: job.backend.fjr[section] = {} performancereport = doc.getElementsByTagName("PerformanceReport")[0] performancesummary = performancereport.getElementsByTagName("PerformanceSummary") for pfs in performancesummary: if pfs.getAttribute("Metric") == section: metrics = pfs.getElementsByTagName("Metric") for metric in metrics: name = metric.getAttribute("Name") if config.has_option(section,name): # Due to the names with minus intead of underscore, we have to do thiw walkarround # to send them to the DB. name = config.get(section,name) if name: job.backend.fjr[section][name] = metric.getAttribute("Value")