Ejemplo n.º 1
0
    def _start_job(self, cmd, cluster_queue=None, verbose=1):
        '''Start test running.  Requires directory lock.

IMPORTANT: use self.start_job rather than self._start_job if using multiple
threads.

Decorated to start_job, which acquires directory lock and enters self.path
first, during initialisation.'''

        if cluster_queue:
            tp_ptr = self.test_program
            submit_file = '%s.%s' % (os.path.basename(self.submit_template),
                                                                tp_ptr.test_id)
            job = queues.ClusterQueueJob(submit_file, system=cluster_queue)
            job.create_submit_file(tp_ptr.submit_pattern, cmd,
                                   self.submit_template)
            if verbose > 2:
                print(('Submitting tests using %s (template submit file) in %s'
                           % (self.submit_template, self.path)))
            job.start_job()
        else:
            # Run locally via subprocess.
            if verbose > 2:
                print(('Running test using %s in %s\n' % (cmd, self.path)))
            try:
                job = subprocess.Popen(cmd, shell=True)
            except OSError:
                # slightly odd syntax in order to be compatible with python 2.5
                # and python 2.6/3
                err = 'Execution of test failed: %s' % (sys.exc_info()[1],)
                raise exceptions.RunError(err)

        # Return either Popen object or ClusterQueueJob object.  Both have
        # a wait method which returns only once job has finished.
        return job
Ejemplo n.º 2
0
 def wait(self):
     '''Returns when job has finished running on the cluster.'''
     running = True
     # Don't ask the queueing system for the job itself but rather parse the
     # output from all current jobs and look  gor the job in question.
     # This works around the problem where the job_id is not a sufficient
     # handle to query the system directly (e.g. on the CMTH cluster).
     qstat_cmd = [self.queue_cmd]
     while running:
         time.sleep(15)
         qstat_popen = subprocess.Popen(qstat_cmd,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
         qstat_popen.wait()
         if qstat_popen.returncode != 0:
             err = ('Error inspecting queue system: %s' %
                    qstat_popen.communicate())
             raise exceptions.RunError(err)
         qstat_out = qstat_popen.communicate()[0]
         # Assume job has finished unless it appears in the qstat output.
         running = False
         for line in qstat_out.splitlines():
             words = line.split()
             if words[self.job_id_column] == self.job_id:
                 running = words[self.status_column] != self.finished_status
                 break
Ejemplo n.º 3
0
 def start_job(self):
     '''Submit job to cluster queue.'''
     submit_cmd = [self.submit_cmd, self.submit_file]
     try:
         submit_popen = subprocess.Popen(submit_cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.STDOUT)
         submit_popen.wait()
         self.job_id = submit_popen.communicate()[0].strip().decode('utf-8')
     except OSError:
         # 'odd' syntax so exceptions work with python 2.5 and python 2.6/3.
         err = 'Error submitting job: %s' % (sys.exc_info()[1], )
         raise exceptions.RunError(err)
Ejemplo n.º 4
0
 def __init__(self, submit_file, system='PBS'):
     self.job_id = None
     self.submit_file = submit_file
     self.system = system
     if self.system == 'PBS':
         self.submit_cmd = 'qsub'
         self.queue_cmd = 'qstat'
         self.job_id_column = 0
         self.status_column = 4
         self.finished_status = 'C'
     else:
         err = 'Queueing system not implemented: %s' % self.system
         raise exceptions.RunError(err)
Ejemplo n.º 5
0
    def create_submit_file(self, pattern, string, template):
        '''Create a submit file.
        
Replace pattern in the template file with string and place the result in
self.submit_file.

:param string pattern: string in template to be replaced.
:param string string: string to replace pattern in template.
:param string template: filename of file containing the template submit script.
'''
        # get template
        if not os.path.exists(template):
            err = 'Submit file template does not exist: %s.' % (template, )
            raise exceptions.RunError(err)
        ftemplate = open(template)
        submit = ftemplate.read()
        ftemplate.close()
        # replace marker with our commands
        submit = submit.replace(pattern, string)
        # write to submit script
        fsubmit = open(self.submit_file, 'w')
        fsubmit.write(submit)
        fsubmit.close()
Ejemplo n.º 6
0
    def _move_output_to_test_output(self, test_files_out):
        '''Move output to the testcode output file.  Requires directory lock.

This is used when a program writes to standard output rather than to STDOUT.

IMPORTANT: use self.move_output_to_test_output rather than
self._move_output_to_test_output if using multiple threads.

Decorated to move_output_to_test_output, which acquires the directory lock and
enters self.path.
'''
        # self.output might be a glob which works with e.g.
        #   mv self.output test_files[ind]
        # if self.output matches only one file.  Reproduce that
        # here so that running tests through the queueing system
        # and running tests locally have the same behaviour.
        out_files = glob.glob(self.output)
        if len(out_files) == 1:
            shutil.move(out_files[0], test_files_out)
        else:
            err = ('Output pattern (%s) matches %s files (%s).' %
                   (self.output, len(out_files), out_files))
            raise exceptions.RunError(err)
Ejemplo n.º 7
0
    def run_test(self, verbose=1, cluster_queue=None, rundir=None):
        '''Run all jobs in test.'''

        try:
            # Construct tests.
            test_cmds = []
            test_files = []
            for (test_input, test_arg) in self.inputs_args:
                if (test_input and not os.path.exists(
                        os.path.join(self.path, test_input))):
                    err = 'Input file does not exist: %s' % (test_input, )
                    raise exceptions.RunError(err)
                test_cmds.append(
                    self.test_program.run_cmd(test_input, test_arg,
                                              self.nprocs))
                test_files.append(
                    util.testcode_filename(FILESTEM['test'],
                                           self.test_program.test_id,
                                           test_input, test_arg))

            # Move files matching output pattern out of the way.
            self.move_old_output_files(verbose)

            # Run tests one-at-a-time locally or submit job in single submit
            # file to a queueing system.
            if cluster_queue:
                if self.output:
                    for (ind, test) in enumerate(test_cmds):
                        # Don't quote self.output if it contains any wildcards
                        # (assume the user set it up correctly!)
                        out = self.output
                        if not compat.compat_any(
                                wild in self.output
                                for wild in ['*', '?', '[', '{']):
                            out = pipes.quote(self.output)
                        test_cmds[ind] = '%s; mv %s %s' % (
                            test_cmds[ind], out, pipes.quote(test_files[ind]))
                test_cmds = ['\n'.join(test_cmds)]
            for (ind, test) in enumerate(test_cmds):
                job = self.start_job(test, cluster_queue, verbose)
                job.wait()
                # Analyse tests as they finish.
                if cluster_queue:
                    # Did all of them at once.
                    for (test_input, test_arg) in self.inputs_args:
                        self.verify_job(test_input, test_arg, verbose, rundir)
                else:
                    # Did one job at a time.
                    (test_input, test_arg) = self.inputs_args[ind]
                    err = []
                    if self.output:
                        try:
                            self.move_output_to_test_output(test_files[ind])
                        except exceptions.RunError:
                            err.append(sys.exc_info()[1])
                    status = validation.Status()
                    if job.returncode != 0:
                        err.insert(
                            0, 'Error running job.  Return code: %i' %
                            job.returncode)
                        (status, msg) = self.skip_job(test_input, test_arg,
                                                      verbose)
                    if status.skipped():
                        self._update_status(status, (test_input, test_arg))
                        if verbose > 0 and verbose < 3:
                            sys.stdout.write(
                                util.info_line(self.path, test_input, test_arg,
                                               rundir))
                        status.print_status(msg, verbose)
                    elif err:
                        # re-raise first error we hit.
                        raise exceptions.RunError(err[0])
                    else:
                        self.verify_job(test_input, test_arg, verbose, rundir)
        except exceptions.RunError:
            err = sys.exc_info()[1]
            if verbose > 2:
                err = 'Test(s) in %s failed.\n%s' % (self.path, err)
            status = validation.Status([False])
            self._update_status(status, (test_input, test_arg))
            if verbose > 0 and verbose < 3:
                info_line = util.info_line(self.path, test_input, test_arg,
                                           rundir)
                sys.stdout.write(info_line)
            status.print_status(err, verbose)
            # Shouldn't run remaining tests after such a catastrophic failure.
            # Mark all remaining tests as skipped so the user knows that they
            # weren't run.
            err = 'Previous test in %s caused a system failure.' % (self.path)
            status = validation.Status(name='skipped')
            for ((test_input, test_arg), stat) in self.status.items():
                if not self.status[(test_input, test_arg)]:
                    self._update_status(status, (test_input, test_arg))
                    if verbose > 2:
                        cmd = self.test_program.run_cmd(
                            test_input, test_arg, self.nprocs)
                        print('Test using %s in %s' % (cmd, self.path))
                    elif verbose > 0:
                        info_line = util.info_line(self.path, test_input,
                                                   test_arg, rundir)
                        sys.stdout.write(info_line)
                    status.print_status(err, verbose)