def getCurrentJobStatus(self, job): """ Set a job to the most current known status. This method will first check if job has a previous pass/fail status. If so, it sets that status with out performing any third party queueing commands. If the job has a status of queued, we call derived methods to determine current status as the third party queueing system sees it. """ tester = job.getTester() json_status = self.getData(job.getUniqueIdentifier(), status_bucket=True, caveat_message=True, std_out=True, working_dir=True) if json_status: bucket = self.__createStatusBucket(json_status) # This test was not included during the launch sequence else: with self.dag_lock: tester.setStatus('NO LAUNCH INFORMATION', tester.bucket_silent) return with self.dag_lock: tester.setStatus(json_status['caveat_message'], bucket) job_information = {} # Job was queued previously, and we want to know if the status has changed. if tester.isQueued(): output = self.executeAndGetJobOutput(job) job_information = self.handleJobStatus(job, output) # Some other finished status. Check conditionals if user wants to re-open stdout. elif self._readJobOutput(job): json_job = self.getData(job.getUniqueIdentifier(), std_out=True, working_dir=True) if json_job['std_out']: stdout_file = os.path.join(json_job['working_dir'], json_job['std_out']) if os.path.exists(stdout_file): with open(stdout_file, 'r') as f: # We can use trimmed output here, now that the job has a proper # status (we are not going to run processResults again). outfile = util.readOutput(f, self.options) job.setOutput(outfile) else: with self.dag_lock: tester.setStatus('NO STDOUT FILE', tester.bucket_fail) # Update session storage with possibly new job status information self.saveSession(job, **job_information)
def runCommand(self, cmd, cwd, timer, options): """ Helper method for running external (sub)processes as part of the tester's execution. This uses the tester's getCommand and getTestDir methods to run a subprocess. The timer must be the same timer passed to the run method. Results from running the subprocess is stored in the tester's output and exit_code fields. """ cmd = self.getCommand(options) cwd = self.getTestDir() self.process = None try: f = TemporaryFile() e = TemporaryFile() # On Windows, there is an issue with path translation when the command is passed in # as a list. if platform.system() == "Windows": process = subprocess.Popen( cmd, stdout=f, stderr=e, close_fds=False, shell=True, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP, cwd=cwd) else: process = subprocess.Popen(cmd, stdout=f, stderr=e, close_fds=False, shell=True, preexec_fn=os.setsid, cwd=cwd) except: print("Error in launching a new task", cmd) raise self.process = process self.outfile = f self.errfile = e timer.start() process.wait() timer.stop() self.exit_code = process.poll() self.outfile.flush() self.errfile.flush() # store the contents of output, and close the file self.joined_out = util.readOutput(self.outfile, self.errfile) self.outfile.close() self.errfile.close()
def getCurrentJobStatus(self, job): """ Set a job to the most current known status. This method will first check if job has a previous pass/fail status. If so, it sets that status with out performing any third party queueing commands. If the job has a status of queued, we call derived methods to determine current status as the third party queueing system sees it. """ tester = job.getTester() json_status = self.getData(job.getUniqueIdentifier(), status_bucket=True, caveat_message=True, std_out=True, working_dir=True) if json_status: bucket = self.__createStatusBucket(json_status) # This test was not included during the launch sequence else: with self.dag_lock: tester.setStatus('NO LAUNCH INFORMATION', tester.bucket_silent) return with self.dag_lock: tester.setStatus(json_status['caveat_message'], bucket) job_information = {} # Job was queued previously, and we want to know if the status has changed. if tester.isQueued(): output = self.executeAndGetJobOutput(job) job_information = self.handleJobStatus(job, output) # Some other finished status. Check conditionals if user wants to re-open stdout. elif self._readJobOutput(job): json_job = self.getData(job.getUniqueIdentifier(), std_out=True, working_dir=True) if json_job['std_out']: stdout_file = os.path.join(json_job['working_dir'], json_job['std_out']) if os.path.exists(stdout_file): with open(stdout_file, 'r') as f: # We can use trimmed output here, now that the job has a proper # status (we are not going to run processResults again). outfile = util.readOutput(f, None, self.options) job.setOutput(outfile) else: with self.dag_lock: tester.setStatus('NO STDOUT FILE', tester.bucket_fail) # Update session storage with possibly new job status information self.saveSession(job, **job_information)
def hasTimedOutOrFailed(self, job_data): """ use qstat and return bool on job failures outside of the TestHarness's control """ launch_id = job_data.json_data.get(job_data.job_dir, {}).get(job_data.plugin, {}).get('ID', "").split('.')[0] # We shouldn't run into a null, but just in case, lets handle it if launch_id: qstat_command_result = util.runCommand('qstat -xf %s' % (launch_id)) # handle a qstat execution failure for some reason if qstat_command_result.find('ERROR') != -1: # set error for each job contained in group for job in job_data.jobs.getJobs(): job.setOutput('ERROR invoking `qstat`\n%s' % (qstat_command_result)) job.setStatus(job.error, 'QSTAT') return True qstat_job_result = re.findall(r'Exit_status = (\d+)', qstat_command_result) # woops. This job was killed by PBS by exceeding walltime if qstat_job_result and qstat_job_result[0] == "271": for job in job_data.jobs.getJobs(): job.addCaveats('Killed by PBS Exceeded Walltime') return True # Capture TestHarness exceptions elif qstat_job_result and qstat_job_result[0] != "0": # Try and gather some useful output we can tack on to one of the job objects output_file = job_data.json_data.get(job_data.job_dir, {}).get( job_data.plugin, {}).get('QSUB_OUTPUT', "") if os.path.exists(output_file): with open(output_file, 'r') as f: output_string = util.readOutput( f, None, job_data.jobs.getJobs()[0].getTester()) job_data.jobs.getJobs()[0].setOutput(output_string) # Add a caveat to each job, explaining that one of the jobs caused a TestHarness exception for job in job_data.jobs.getJobs(): job.addCaveats('TESTHARNESS EXCEPTION') return True
def runCommand(self, cmd, cwd, timer, options): """ Helper method for running external (sub)processes as part of the tester's execution. This uses the tester's getCommand and getTestDir methods to run a subprocess. The timer must be the same timer passed to the run method. Results from running the subprocess is stored in the tester's output and exit_code fields. """ cmd = self.getCommand(options) cwd = self.getTestDir() self.process = None try: f = TemporaryFile() e = TemporaryFile() # On Windows, there is an issue with path translation when the command is passed in # as a list. if platform.system() == "Windows": process = subprocess.Popen(cmd, stdout=f, stderr=e, close_fds=False, shell=True, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP, cwd=cwd) else: process = subprocess.Popen(cmd, stdout=f, stderr=e, close_fds=False, shell=True, preexec_fn=os.setsid, cwd=cwd) except: print("Error in launching a new task", cmd) raise self.process = process self.outfile = f self.errfile = e timer.start() process.wait() timer.stop() self.exit_code = process.poll() self.outfile.flush() self.errfile.flush() # store the contents of output, and close the file self.joined_out = util.readOutput(self.outfile, self.errfile, options) self.outfile.close() self.errfile.close()
def hasTimedOutOrFailed(self, job_data): """ use qstat and return bool on job failures outside of the TestHarness's control """ launch_id = job_data.json_data.get(job_data.job_dir, {}).get(job_data.plugin, {}).get('ID', "").split('.')[0] # We shouldn't run into a null, but just in case, lets handle it if launch_id: qstat_command_result = util.runCommand('qstat -xf %s' % (launch_id)) # handle a qstat execution failure for some reason if qstat_command_result.find('ERROR') != -1: # set error for each job contained in group for job in job_data.jobs.getJobs(): job.setOutput('ERROR invoking `qstat`\n%s' % (qstat_command_result)) job.setStatus(job.error, 'QSTAT') return True qstat_job_result = re.findall(r'Exit_status = (\d+)', qstat_command_result) # woops. This job was killed by PBS by exceeding walltime if qstat_job_result and qstat_job_result[0] == "271": for job in job_data.jobs.getJobs(): job.addCaveats('Killed by PBS Exceeded Walltime') return True # Capture TestHarness exceptions elif qstat_job_result and qstat_job_result[0] != "0": # Try and gather some useful output we can tack on to one of the job objects output_file = job_data.json_data.get(job_data.job_dir, {}).get(job_data.plugin, {}).get('QSUB_OUTPUT', "") if os.path.exists(output_file): with open(output_file, 'r') as f: output_string = util.readOutput(f, None, self.options) job_data.jobs.getJobs()[0].setOutput(output_string) # Add a caveat to each job, explaining that one of the jobs caused a TestHarness exception for job in job_data.jobs.getJobs(): job.addCaveats('TESTHARNESS EXCEPTION') return True