Esempio n. 1
0
    def pull(self):

        self.lock_thread.acquire()

        # get jobs for this worker ( threadsafe and takes a long time, so outside of thread lock )
        jobs_all, jobs_new = worker_pull(self.uid, jobs=self.available())

        # remove all lost jobs from ping
        jobs_remove = []
        last_job = len(self.jobs)
        while last_job > 0:
            last_job -= 1
            if self.jobs[last_job]['recipe']['setup']['uuid'] not in jobs_all:
                jobs_remove.append(self.jobs[last_job])
                del self.jobs[last_job]

        # add all new jobs to ping
        self.jobs.extend(jobs_new)

        # allow pings to resume with up to date list
        self.lock_thread.release()

        # shut down all removed jobs
        try:
            for job in jobs_remove:
                if job.get('job', {}).get('process'):
                    job['job']['process'].kill()
                self.cleanup(job)
                log_job_cancel(job)
        except Exception as e:
            log_manager_error(traceback.format_exc())
    def handle(self, *args, **kwargs):
        global MANAGER_ON
        global MANAGER_HEALTHY

        MANAGER_ON = True
        MANAGER_HEALTHY = True

        if kwargs['test']: print('Starting Up...')

        if kwargs['verbose']: log_verbose()

        log_manager_start()

        if kwargs['test']: print('Initializing Workers...')

        workers = Workers(
            kwargs['worker'],
            kwargs['jobs'],
            kwargs['timeout'],
            kwargs['trace'],
        )

        try:

            while MANAGER_HEALTHY and MANAGER_ON:

                # load jobs
                workers.pull()

                time.sleep(JOB_INTERVAL_MS / 1000)

                # evaluate jobs
                workers.poll()

                # check if worker needs to scale down
                if workers.idle():
                    MANAGER_ON = False
                    log_manager_timeout()
                else:
                    time.sleep(JOB_INTERVAL_MS / 1000)

                if kwargs['test']:
                    MANAGER_ON = False

        except KeyboardInterrupt:
            MANAGER_ON = False

        except Exception as e:
            if kwargs['test']: print(str(e))
            log_manager_error(traceback.format_exc())

        if MANAGER_HEALTHY:
            if kwargs['test']: print('Shutting Down...')
            workers.shutdown()

        log_manager_end()

        # worker will terminate itself in a group safe way
        worker_downscale()
Esempio n. 3
0
 def ping(self):
   global MANAGER_HEALTHY
   while MANAGER_HEALTHY and not self.ping_event.wait(JOB_INTERVAL_MS / 1000):
     self.lock_thread.acquire()
     try:
       worker_ping(self.uid,
                   [job['recipe']['setup']['uuid'] for job in self.jobs])
     except Exception as e:
       log_manager_error(traceback.format_exc())
       MANAGER_HEALTHY = False
     self.lock_thread.release()
Esempio n. 4
0
def worker_downscale():
    try:
        group_instances_delete(get_instance_name())
    except HttpError as e:
        log_manager_error('WORKER DOWNSCALE NOT AVAILABLE: %s' % str(e))