Exemple #1
0
    def run(self, job, force=False):

        self.lock_thread.acquire()
        self.jobs.append(job)
        self.lock_thread.release()

        job['recipe']['setup'].setdefault('timeout_seconds',
                                          self.timeout_seconds)

        job['job'] = {
            'worker': self.uid,
            'id': str(uuid.uuid1()),
            'process': None,
            'utc': datetime.utcnow(),
        }

        filename = '%s/%s.json' % (settings.UI_CRON, job['job']['id'])

        with open(filename, 'w') as job_file:
            job_file.write(json.dumps(job['recipe'], default=str))

        command = [
            '%s/starthinker_virtualenv/bin/python' % settings.UI_ROOT,
            '-W',
            'ignore',
            '%s/starthinker/task/%s/run.py' %
            (settings.UI_ROOT, job['script']),
            filename,
            '-i',
            str(job['instance']),
            '--verbose',
        ]

        job['job']['process'] = subprocess.Popen(command,
                                                 shell=False,
                                                 cwd=settings.UI_ROOT,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
        worker_status(job['job']['worker'], job['recipe']['setup']['uuid'],
                      job['script'], job['instance'], job['hour'], 'JOB_START',
                      "", "")
        log_job_start(job)
    def test_worker(self):
        # manual mode ( without force always returns no tasks )
        job = worker_pull('SAMPLE_WORKER', jobs=1)
        self.assertEqual(len(job), 0)

        # advance time, since current jobs need to expire, artificially ping to keep out of queue
        sleep((JOB_LOOKBACK_MS * 2) / 1000.0)

        # remove time dependency for this test, force all tasks
        self.recipe.force()
        status = self.recipe.get_status()

        for task in status['tasks']:

            job = worker_pull('SAMPLE_WORKER', jobs=1)
            self.assertEqual(len(job), 1)
            job = job[0]

            self.assertEqual(job['event'], 'JOB_PENDING')
            self.assertEqual(job['instance'], task['instance'])
            self.assertEqual(job['hour'], task['hour'])

            # job is not run through actual worker, so 'job' key will be missing, simulate it
            job['job'] = {
                'worker': 'SAMPLE_WORKER',
                'id': 'SAMPLE_JOB_UUID',
                'process': None,
                'utc': datetime.utcnow(),
            }

            worker_status(job['job']['worker'], job['recipe']['setup']['uuid'],
                          job['script'], job['instance'], job['hour'],
                          'JOB_END', "Output is OK.", "")

            sleep((JOB_LOOKBACK_MS * 2) / 1000.0)

        # after completing all tasks, check if they whole recipe is done
        self.recipe.refresh_from_db()
        self.assertTrue(self.recipe.job_done)
        status = self.recipe.get_status()
        self.assertTrue(
            all([(task['event'] == 'JOB_END') for task in status['tasks']]))
    def poll(self):

        self.lock_thread.acquire()

        for job in self.jobs:

            # if process still running, check timeout or ping keep alive
            poll = job['job']['process'].poll()
            if poll is None:

                # check if task is a timeout
                if (datetime.utcnow() - job['job']['utc']).total_seconds(
                ) > job['recipe']['setup']['timeout_seconds']:
                    job['job']['process'].kill()
                    self.cleanup(job)
                    worker_status(job['job']['worker'],
                                  job['recipe']['setup']['uuid'],
                                  job['script'], job['instance'], job['hour'],
                                  'JOB_TIMEOUT', '', '')
                    log_job_timeout(job)
                    job['job']['process'] = None

                # otherwise task is running, do nothing
                else:
                    pass

            # if process has return code, check if task is complete or error
            else:
                job['stdout'], job['stderr'] = job['job'][
                    'process'].communicate()
                self.cleanup(job)

                # if error scrap whole worker and flag error
                if job['stderr']:  # possibly alter this to use poll != 0 ( which indicates errror as well )
                    self.cleanup(job)
                    worker_status(job['job']['worker'],
                                  job['recipe']['setup']['uuid'],
                                  job['script'], job['instance'], job['hour'],
                                  'JOB_ERROR', job['stdout'], job['stderr'])
                    log_job_error(job)
                    job['job']['process'] = None

                # if success, pop task off the stack and flag success
                else:
                    worker_status(job['job']['worker'],
                                  job['recipe']['setup']['uuid'],
                                  job['script'], job['instance'], job['hour'],
                                  'JOB_END', job['stdout'], job['stderr'])
                    log_job_end(job)
                    job['job']['process'] = None

        # remove all workers without a process, they are done
        self.jobs = [
            job for job in self.jobs if job['job']['process'] is not None
        ]

        self.lock_thread.release()

        # if workers remain, return True
        return bool(self.jobs)