Ejemplo n.º 1
0
    def test_pipeline(self):
        """Test the pipeline generation"""
        self.setup_pipeline(
            ('A', 'ls -l ${file}.txt > @{file}.ls'),
            ('B', 'wc ${file}.ls > @{file}.c'),
            ('C', 'ls -l ${file}.txt ${file}.ls ${file}.c > @{file}.out'),
            ('D', 'wc ${file}.out > @{file}.out'),
        )
        result = self.pipeline.run("pipe",
                                   output_dir=self.dir,
                                   file=self.filename)
        limit = 40
        while not result.update_all():
            if limit == 40:
                get_job_manager().run_all()
            elif limit == 35:
                get_job_manager().finish_all()
            time.sleep(0.2)
            limit -= 1
            self.assertTrue(limit > 0, "Pipeline test timed out.")

        results = list(result.programs.all())
        self.assertEqual(len(results), 4)
        filename = self.filename[:-4] + ".%s"
        self.assertProgram(results[0], self.filename, filename % "ls")
        self.assertProgram(results[1], filename % "ls", filename % "c")
        #self.assertProgram(results[2], [
        #    filename % "ls", self.filename, filename % "c"], filename % "out")
        self.assertProgram(results[3], filename % "out", filename % "out")
Ejemplo n.º 2
0
 def test_duration(self):
     """Test the duration during a run"""
     self.assertEqual(type(get_job_manager()).__name__, 'FakeJobManager')
     self.setup_pipeline(('DUR', 'sleep 5'))
     results = self.pipeline.run("pipe", output_dir=self.dir)
     result = results.programs.get()
     get_job_manager().run_all()
     for count in range(5):
         result.update_status()
         self.assertEqual(int(result.duration), count + 1)
         time.sleep(1)
Ejemplo n.º 3
0
 def setUp(self):
     super(ManagerTestBase, self).setUp()
     self.tempdir = tempfile.mkdtemp(suffix='chore-tests')
     self.manager = get_job_manager(self.manager_cls, self.tempdir, self.batched)
     if not self.manager.is_enabled():
         self.skipTest("Manager {} is not enabled".format(self.manager_cls))
     self.filename = tempfile.mktemp(prefix='test-job-')
Ejemplo n.º 4
0
    def update_status(self, commit=True):
        """Take data from the job manager and populate the database"""
        job_manager = get_job_manager()
        if self.is_submitted and not self.is_complete:
            dur = None
            data = job_manager.status(self.job_id, clean=False)
            age = now() - self.submitted

            if not data and age > timedelta(hours=1):
                # This usually means the job is so old that it's gone from
                # the job manager queue and we have no further information about it
                self.is_complete = True
                self.is_error = True
                self.error_text = "Job Disapeared from Job Queue"
                return

            if data.get('status', 'notfound') in ('finished', ):
                if data['finished'] and data['started']:
                    dur = data['finished'] - data['started']
                    self.duration = dur.total_seconds() + int(
                        dur.microseconds > 0)
                self.completed = data['finished']
                self.is_complete = True
                self.is_error = data['return'] != 0
                if data['error']:
                    self.error_text = data[
                        'error'][:10240]  # Limit errors to 10k
                self.input_size = self.update_size(*self.input_fn) / 1024.0
                self.output_size = self.update_size(*self.output_fn) / 1024.0

            if data.get('started', None) is not None:
                if not self.is_started:
                    self.is_started = True
                    self.started = data['started']
                # Save the duration so far
                dur = now() - data['started']
                # Round up any microseconds, useful for testing non-zero time
                self.duration = dur.total_seconds() + int(dur.microseconds > 0)

            if data and self.previous_id:
                for prev in ProgramRun.objects.filter(job_id=self.previous_id):
                    if prev.is_error:
                        job_manager.stop(self.job_id)
                        self.is_error = True

            if commit:
                self.save()

        # We're going to force an error out of hiding.
        if self.is_complete and self.error_text == 'None':
            (_, error) = job_manager.job_read(self.job_id, 'err')
            if error is not None:
                self.error_text = "Broken JobID error: " + error
            else:
                self.error_text = "Lost error for {}".format(self.job_id)
            self.is_error = True
            self.save()

        return self.is_complete
Ejemplo n.º 5
0
 def stop(self, msg='Stopped'):
     """Stop this program from running"""
     if self.is_submitted and not self.is_complete:
         ret = get_job_manager().stop(self.job_id)
         self.is_error = True
         self.is_complete = True
         self.error_text = msg
         self.save()
         return ret
     return True
Ejemplo n.º 6
0
 def get_context_data(self, **kw):
     data = super(JobViewer, self).get_context_data(**kw)
     data['pipeline'] = get_job_manager()
     kw = {}
     if 'user' in self.request.GET:
         kw['user'] = self.request.GET['user']
     cols = [c for c in self.request.GET.get('cols', '').split(',') if c]
     data['object_list'] = [self.get_item(item, cols)\
         for item in data['pipeline'].jobs_status(*cols, **kw)]
     data['pipeline_name'] = type(data['pipeline']).__name__
     data['cols'] = cols
     return data
Ejemplo n.º 7
0
    def _raw_status(self):
        # This fixed to batch mode FALSE, change to `status` if you need batch mode
        kw = {}
        if self.submitted:
            kw['start'] = self.submitted - timedelta(days=1)
            kw['end'] = self.submitted + timedelta(days=7)
        else:
            # This is really weird
            kw['start'] = '2016-01-01'
            kw['end'] = '2040-01-01'

        return get_job_manager().job_status(self.job_id, **kw)
Ejemplo n.º 8
0
    def get_context_data(self, **kw):
        data = super(JobViewer, self).get_context_data(**kw)
        data['pipeline'] = get_job_manager()

        kw = {}
        if 'user' in self.request.GET:
            kw['user'] = self.request.GET['user']

        if 'wckeys' in self.request.GET:
            kw['wckeys'] = [
                k for k in self.request.GET.get('wckeys', '').split(',') if k
            ]

        if 'start' in self.request.GET:
            kw['start'] = self.request.GET['start']
        else:
            kw['start'] = (date.today() - timedelta(days=7)).isoformat()

        if 'end' in self.request.GET:
            kw['end'] = self.request.GET['end']
        else:
            kw['end'] = (date.today() + timedelta(days=1)).isoformat()

        if 'col' in self.request.GET:
            cols = list(self.request.GET.getlist('col'))
        else:
            cols = [
                c for c in self.request.GET.get('cols', '').split(',') if c
            ]

        data['object_list'] = [self.get_item(item, cols)\
            for item in data['pipeline'].jobs_status(*cols, **kw)
            if self.filter_item(item, str(item['pid'])) ]
        data['pipeline_name'] = type(data['pipeline']).__name__
        data['cols'] = cols
        data['kw'] = kw
        data['extra_cols'] = [
            ('User', 'Job Running User'),
            ('Account', 'O2 Account Name'),
            ('Partition', 'Server Partition'),
            ('AveRSS', 'Average resident set size'),
            ('TotalCPU', 'Total CPU'),
            ('ReqMem', 'Requested Memory'),
            ('MaxVMSize', 'Maximum Used Memory'),
            ('AveDiskRead', 'Average Disk Read'),
            ('AveDiskWrite', 'Average Disk Write'),
        ]
        return data
Ejemplo n.º 9
0
    def run(self, commit=True, **kwargs):
        """Run this pipeline run (creates ProgramRun objects)"""
        runs = []
        if not commit:
            self.test_programs = []

        if 'clean_files' in kwargs:
            self.clean_files = '\n'.join(kwargs['clean_files'])
            if commit:
                self.save()

        for pipe in self.pipeline.programs.all():
            if commit:
                run, _ = ProgramRun.objects.get_or_create(piperun=self,
                                                          **pipe.prepare(
                                                              self.pk))
            else:
                run = ProgramRun(piperun=self, **pipe.prepare(self.pk))
                self.test_programs.append(run)
            runs.append(run)

        for prev, run, foll in tripplet(runs):
            if not run.is_submitted:
                if not run.submit(
                        commit=commit, previous=prev, follower=foll, **kwargs):
                    return False
            else:
                data = get_job_manager().status(run.job_id, clean=False)
                if data.get('finished', None) and data.get('return', 1) != 1:
                    raise JobSubmissionError("Existing job already failed.")

            # Sort out the filenames for the next call in the chain
            for package, filename in run.program.prepare_files(**kwargs):
                name = package[1]
                if name in kwargs:
                    if isinstance(kwargs[name], list):
                        kwargs[name].append(filename)
                    else:
                        kwargs[name] = [kwargs[name], filename]
                else:
                    kwargs[name] = [filename]

        return True
Ejemplo n.º 10
0
 def job_submit(self, cmd, **kwargs):
     """Actually submit job to job_manager"""
     job_manager = get_job_manager()
     job_manager.submit(self.job_id, cmd, **kwargs)
Ejemplo n.º 11
0
 def job_clean(self):
     """Remove old command files"""
     job_manager = get_job_manager()
     job_manager.job_clean_fn(self.job_id, 'out')
     job_manager.job_clean_fn(self.job_id, 'err')
Ejemplo n.º 12
0
    def update_status(self, commit=True, force=False):
        """Take data from the job manager and populate the database"""
        job_manager = get_job_manager()
        if self.is_submitted and (not self.is_complete or force):
            dur = None
            data = self._raw_status()

            if not data or 'status' not in data:
                # This usually means the job is so old that it's gone from
                # the job manager queue and we have no further information about it
                if now() - self.submitted < timedelta(days=1):
                    return
                self.is_complete = True
                self.is_error = True
                self.error_text = "Job Stopped"
                self.save()
                return

            self.job_state = data.get('state', '')
            if not self.submitted and 'submit' in data:
                self.submitted = data['submit']

            # Attempt to reclassify failures of quality control steps
            if self.job_state == 'FAILED' and self.program.quality_control:
                self.job_state = 'INVALID'

            if data.get('status', 'notfound') in ('finished', ):
                if data['finished'] and data['started']:
                    dur = data['finished'] - data['started']
                    self.duration = dur.total_seconds() + int(
                        dur.microseconds > 0)
                self.completed = data['finished']
                self.is_complete = True
                self.is_error = data['return'] != 0
                self.error_text = ""
                if data['error']:
                    self.error_text += data[
                        'error'][:10240]  # Limit errors to 10k
                self.update_sizes()

            if data.get('started', None) is not None:
                if not self.is_started:
                    self.is_started = True
                    self.started = data['started']
                # Save the duration so far
                dur = now() - data['started']
                # Round up any microseconds, useful for testing non-zero time
                self.duration = dur.total_seconds() + int(dur.microseconds > 0)

            if data and self.previous_id:
                for prev in ProgramRun.objects.filter(job_id=self.previous_id):
                    if prev.is_error:
                        job_manager.stop(self.job_id)
                        self.is_error = True

            if commit:
                self.save()

        # We're going to force an error out of hiding.
        if self.is_complete and self.error_text == 'None':
            (_, error) = job_manager.job_read(self.job_id, 'err')
            if error is not None:
                self.error_text = "Broken JobID error: " + error
            else:
                self.error_text = "Lost error for {}".format(self.job_id)
            self.is_error = True
            self.save()

        return self.is_complete