def run(api_port=8082, address=None, scheduler=None, responder=None): """ Runs one instance of the API server. """ if scheduler is None: scheduler = CentralPlannerScheduler() # load scheduler state scheduler.load() _init_api(scheduler, responder, api_port, address) # prune work DAG every 60 seconds pruner = tornado.ioloop.PeriodicCallback(scheduler.prune, 60000) pruner.start() def shutdown_handler(foo=None, bar=None): logger.info("Scheduler instance shutting down") scheduler.dump() os._exit(0) signal.signal(signal.SIGINT, shutdown_handler) signal.signal(signal.SIGTERM, shutdown_handler) if os.name == 'nt': signal.signal(signal.SIGBREAK, shutdown_handler) else: signal.signal(signal.SIGQUIT, shutdown_handler) atexit.register(shutdown_handler) logger.info("Scheduler starting up") tornado.ioloop.IOLoop.instance().start()
def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None, worker_processes=1, ping_interval=None, keep_alive=None, wait_interval=None, max_reschedules=None): self._worker_info = self._generate_worker_info() if not worker_id: worker_id = 'Worker(%s)' % ', '.join( ['%s=%s' % (k, v) for k, v in self._worker_info]) config = configuration.get_config() if ping_interval is None: ping_interval = config.getfloat('core', 'worker-ping-interval', 1.0) if keep_alive is None: keep_alive = config.getboolean('core', 'worker-keep-alive', False) self.__keep_alive = keep_alive if keep_alive: if wait_interval is None: wait_interval = config.getint('core', 'worker-wait-interval', 1) self.__wait_interval = wait_interval if max_reschedules is None: max_reschedules = config.getint('core', 'max-reschedules', 1) self.__max_reschedules = max_reschedules self._id = worker_id self._scheduler = scheduler if (isinstance(scheduler, CentralPlannerScheduler) and worker_processes != 1): warnings.warn( "Will only use one process when running with local in-process scheduler" ) worker_processes = 1 self.worker_processes = worker_processes self.host = socket.gethostname() self._scheduled_tasks = {} self.add_succeeded = True self.run_succeeded = True self.unfulfilled_counts = collections.defaultdict(int) class KeepAliveThread(threading.Thread): """ Periodically tell the scheduler that the worker still lives """ def __init__(self): super(KeepAliveThread, self).__init__() self._should_stop = threading.Event() def stop(self): self._should_stop.set() def run(self): while True: self._should_stop.wait(ping_interval) if self._should_stop.is_set(): logger.info( "Worker %s was stopped. Shutting down Keep-Alive thread" % worker_id) break try: scheduler.ping(worker=worker_id) except: # httplib.BadStatusLine: logger.warning('Failed pinging scheduler') self._keep_alive_thread = KeepAliveThread() self._keep_alive_thread.daemon = True self._keep_alive_thread.start()
def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None, worker_processes=1, ping_interval=None, keep_alive=None, wait_interval=None, max_reschedules=None, count_uniques=None, worker_timeout=None): self.worker_processes = int(worker_processes) self._worker_info = self._generate_worker_info() if not worker_id: worker_id = 'Worker(%s)' % ', '.join(['%s=%s' % (k, v) for k, v in self._worker_info]) config = configuration.get_config() if ping_interval is None: ping_interval = config.getfloat('core', 'worker-ping-interval', 1.0) if keep_alive is None: keep_alive = config.getboolean('core', 'worker-keep-alive', False) self.__keep_alive = keep_alive # worker-count-uniques means that we will keep a worker alive only if it has a unique # pending task, as well as having keep-alive true if count_uniques is None: count_uniques = config.getboolean('core', 'worker-count-uniques', False) self.__count_uniques = count_uniques if wait_interval is None: wait_interval = config.getint('core', 'worker-wait-interval', 1) self.__wait_interval = wait_interval if max_reschedules is None: max_reschedules = config.getint('core', 'max-reschedules', 1) self.__max_reschedules = max_reschedules if worker_timeout is None: worker_timeout = configuration.get_config().getint('core', 'worker-timeout', 0) self.__worker_timeout = worker_timeout self._id = worker_id self._scheduler = scheduler self.host = socket.gethostname() self._scheduled_tasks = {} self._suspended_tasks = {} self._first_task = None self.add_succeeded = True self.run_succeeded = True self.unfulfilled_counts = collections.defaultdict(int) class KeepAliveThread(threading.Thread): """ Periodically tell the scheduler that the worker still lives. """ def __init__(self): super(KeepAliveThread, self).__init__() self._should_stop = threading.Event() def stop(self): self._should_stop.set() def run(self): while True: self._should_stop.wait(ping_interval) if self._should_stop.is_set(): logger.info("Worker %s was stopped. Shutting down Keep-Alive thread" % worker_id) break fork_lock.acquire() try: scheduler.ping(worker=worker_id) except: # httplib.BadStatusLine: logger.warning('Failed pinging scheduler') finally: fork_lock.release() self._keep_alive_thread = KeepAliveThread() self._keep_alive_thread.daemon = True self._keep_alive_thread.start() # Keep info about what tasks are running (could be in other processes) self._task_result_queue = multiprocessing.Queue() self._running_tasks = {}
def __init__(self, scheduler=CentralPlannerScheduler(), worker_id=None, worker_processes=1, ping_interval=None, keep_alive=None, wait_interval=None): if not worker_id: worker_id = 'worker-%09d' % random.randrange(0, 999999999) config = configuration.get_config() if ping_interval is None: ping_interval = config.getfloat('core', 'worker-ping-interval', 1.0) if keep_alive is None: keep_alive = config.getboolean('core', 'worker-keep-alive', False) self.__keep_alive = keep_alive if keep_alive: if wait_interval is None: wait_interval = config.getint('core', 'worker-wait-interval', 1) self.__wait_interval = wait_interval self._id = worker_id self._scheduler = scheduler if (isinstance(scheduler, CentralPlannerScheduler) and worker_processes != 1): warnings.warn( "Will only use one process when running with local in-process scheduler" ) worker_processes = 1 self.worker_processes = worker_processes self.host = socket.gethostname() self._scheduled_tasks = {} # store the previous tasks executed by the same worker # for debugging reasons self._previous_tasks = [] class KeepAliveThread(threading.Thread): """ Periodically tell the scheduler that the worker still lives """ def __init__(self): super(KeepAliveThread, self).__init__() self._should_stop = threading.Event() def stop(self): self._should_stop.set() def run(self): while True: self._should_stop.wait(ping_interval) if self._should_stop.is_set(): logger.info( "Worker was stopped. Shutting down Keep-Alive thread" ) break try: scheduler.ping(worker=worker_id) except: # httplib.BadStatusLine: logger.warning('Failed pinging scheduler') self._keep_alive_thread = KeepAliveThread() self._keep_alive_thread.daemon = True self._keep_alive_thread.start()