def pull(self): self.lock_thread.acquire() # get jobs for this worker ( threadsafe and takes a long time, so outside of thread lock ) jobs_all, jobs_new = worker_pull(self.uid, jobs=self.available()) # remove all lost jobs from ping jobs_remove = [] last_job = len(self.jobs) while last_job > 0: last_job -= 1 if self.jobs[last_job]['recipe']['setup']['uuid'] not in jobs_all: jobs_remove.append(self.jobs[last_job]) del self.jobs[last_job] # add all new jobs to ping self.jobs.extend(jobs_new) # allow pings to resume with up to date list self.lock_thread.release() # shut down all removed jobs try: for job in jobs_remove: if job.get('job', {}).get('process'): job['job']['process'].kill() self.cleanup(job) log_job_cancel(job) except Exception as e: log_manager_error(traceback.format_exc())
def handle(self, *args, **kwargs): global MANAGER_ON global MANAGER_HEALTHY MANAGER_ON = True MANAGER_HEALTHY = True if kwargs['test']: print('Starting Up...') if kwargs['verbose']: log_verbose() log_manager_start() if kwargs['test']: print('Initializing Workers...') workers = Workers( kwargs['worker'], kwargs['jobs'], kwargs['timeout'], kwargs['trace'], ) try: while MANAGER_HEALTHY and MANAGER_ON: # load jobs workers.pull() time.sleep(JOB_INTERVAL_MS / 1000) # evaluate jobs workers.poll() # check if worker needs to scale down if workers.idle(): MANAGER_ON = False log_manager_timeout() else: time.sleep(JOB_INTERVAL_MS / 1000) if kwargs['test']: MANAGER_ON = False except KeyboardInterrupt: MANAGER_ON = False except Exception as e: if kwargs['test']: print(str(e)) log_manager_error(traceback.format_exc()) if MANAGER_HEALTHY: if kwargs['test']: print('Shutting Down...') workers.shutdown() log_manager_end() # worker will terminate itself in a group safe way worker_downscale()
def ping(self): global MANAGER_HEALTHY while MANAGER_HEALTHY and not self.ping_event.wait(JOB_INTERVAL_MS / 1000): self.lock_thread.acquire() try: worker_ping(self.uid, [job['recipe']['setup']['uuid'] for job in self.jobs]) except Exception as e: log_manager_error(traceback.format_exc()) MANAGER_HEALTHY = False self.lock_thread.release()
def worker_downscale(): try: group_instances_delete(get_instance_name()) except HttpError as e: log_manager_error('WORKER DOWNSCALE NOT AVAILABLE: %s' % str(e))